TO DO: - make sure y axis is transformed for figures - export figures

will use narrow genetic dataset: -R = 1 & -min_maf = 0.05

1 Set up notebook

1.1 Load libraries & functions

source(here::here("libraries.R"))
source(here::here("functions.R"))

1.2 Import data

1.2.1 Urbanization data

urb <- read.csv(here::here("./clean_data/urb_metrics.csv")) %>%
  dplyr::mutate(site_id = as.factor(site_id),
                patch_id = as.factor(patch_id),
                urb_rur = as.factor(urb_rur),
                quadrant = as.factor(quadrant),
                river_valley = as.factor(river_valley)
                )

1.2.2 Genetic data

input_file <- here::here("./summary_stats/mmaf0.05_R1_populations_output/populations.sumstats_summary.tsv")


div <- extract_variant_position_summary(input_file) %>%
# if population doesn't start with "MWI", it should start with "MW"
  # clean names
  janitor::clean_names() %>%
  # rename col 1
  dplyr::rename(pop_id = 1) %>%
  dplyr::mutate(pop_id = as.factor(pop_id))


div_all <- extract_all_position_summary(input_file) %>%
# if population doesn't start with "MWI", it should start with "MW"
  # clean names
  janitor::clean_names() %>%
  # rename col 1
  dplyr::rename(pop_id = 1) %>%
  dplyr::mutate(pop_id = as.factor(pop_id))


### Add "MW" to numeric populations
diversity_all_sites <- add_MW_IDs(div_all)
diversity_variant_sites <- add_MW_IDs(div)

1.3 Join genetic data with urbanization data

# left vs. full join because some populations in urb_clean weren't genotyped due to lack of material
diversity_all_sites %<>%
  left_join(., urb, by = "patch_id") %>%
  dplyr::mutate(patch_id = as.factor(patch_id),
                pop_id = as.factor(pop_id))

diversity_variant_sites %<>%
  left_join(., urb, by = "patch_id") %>%
  dplyr::mutate(patch_id = as.factor(patch_id),
                pop_id = as.factor(pop_id))


# make new columns: u_r_dist & u_r_usc (urban or rural)
# this column is different from urb_rur, which I made when scouting sites for the 100 populations. I don't have those types of designations for the sites I imported from my previous observational study (note the NAs). So I'll calculate two new columns that assign a population an "urban" or "rural" status based on its distance from city center (u_r_dist) or urbanization score (u_r_usc)
diversity_variant_sites %<>%
  dplyr::mutate(u_r_dist = case_when(
    City_dist <= 40 ~ "Urban",
    TRUE ~ "Rural")) %>%
  dplyr::mutate(u_r_usc = case_when(
    urb_score > 0 ~ "Urban",
    TRUE ~ "Rural"))


diversity_all_sites %<>%
  dplyr::mutate(u_r_dist = case_when(
    City_dist <= 40 ~ "Urban",
    TRUE ~ "Rural")) %>%
  dplyr::mutate(u_r_usc = case_when(
    urb_score > 0 ~ "Urban",
    TRUE ~ "Rural"))

2 Does urbanization predict genetic diversity within pops?

2.1 Explore data

2.1.1 All sites

2.1.2 Variant sites

2.2 Urbanization is continuous / data = all sites

2.2.1 Fit linear models/diagnostics

2.2.1.1 Private alleles

This is 0 for each individual.

2.2.1.2 Inbreeding coefficient (FIS)

This is extremely zero-inflated. I’ve tried fitting models with zero inflation parameters w/glmmTMB but they don’t converge and/or diagnostics don’t pass tests. I’ll fit manual hurdle models instead.

2.2.1.2.1 City_dist
diversity_all_sites %<>%
  mutate(fis_bin = case_when(
    fis == 0 ~ 0,
    fis > 0 ~ 1,
    TRUE ~ 1)
  )
  
# binary
# doesn't converge w/num_indiv as random effect
fis_dist_bin <- glmmTMB(fis_bin ~ City_dist
                       # + (1|num_indv)
                        ,
           diversity_all_sites,
           family = "binomial")

performance::check_model(fis_dist_bin)

# quantitative
fis_dist_quant <- glmmTMB(fis * 1000 ~  City_dist
                           + (1|num_indv)
                        ,
           diversity_all_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_dist_quant)

2.2.1.2.2 Urb_score
# binary
fis_usc_bin <- glmmTMB(fis_bin ~ urb_score                           + (1|num_indv)
                        ,
           diversity_all_sites,
           family = "binomial")

performance::check_model(fis_usc_bin)

# quantitative
fis_usc_quant <- glmmTMB(fis * 1000 ~ urb_score                            + (1|num_indv)
                        ,
           diversity_all_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_usc_quant)

2.2.1.3 Pi

2.2.1.3.1 City_dist
pi_dist <- glmmTMB(pi ~ City_dist ,
           diversity_all_sites)

performance::check_model(pi_dist)

2.2.1.3.2 Urb_score
pi_usc <- glmmTMB(pi ~ urb_score ,
           diversity_all_sites)

performance::check_model(pi_usc)

2.2.2 ANOVA & effect sizes

mod_list <- list(# priv_dist,
                 # priv_usc,
                 fis_dist_bin,
                 fis_dist_quant,
                 fis_usc_bin,
                 fis_usc_quant,
                 pi_dist,
                 pi_usc)


all_anovas <- lapply(mod_list, create_anova_df) %>%
  do.call(rbind, .) %>%
  dplyr::mutate("Variable" = case_when(
      Variable == "fis * 1000" ~ "FIS (quantitative)",
      Variable == "fis * 10000" ~ "FIS (quantitative)",
      Variable == "fis" ~ "FIS (quantitative)",
      Variable == "fis_bin" ~ "FIS (binary)",
      Variable == "pi" ~ "Pi",
      Variable == "private" ~ "Private alleles",
      TRUE ~ "")) %>%
    dplyr::mutate("Urbanization" = case_when(
      Urbanization == "City_dist" ~ "Distance",
      Urbanization == "urb_score" ~ "Urbanization Score",
      TRUE ~ "")) %>%
  dplyr::select(1,5,2,3,4,7,6) %T>%
  view

# create flextable
all_anovas %>%
  dplyr::select(-Sig) %>%
  flextable::flextable() %>%
  flextable::compose(i = 1, j = 3, part = "header",
                     value = as_paragraph("Χ", as_sup("2"))) %>%
  flextable::compose(i = 1, j = 6, part = "header",
                     value = as_paragraph("R", as_sup("2"))) %>%
  bold(~ p <= 0.05) %>%
  flextable::autofit()

2.2.3 Figures

2.2.3.1 Private alleles

2.2.3.1.1 City_dist
ggplot(diversity_all_sites,
       aes(x = City_dist,
           y = private)) +
  geom_point(aes(x = City_dist,
           y = private)) +
  geom_line() +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Distance to City Center (km)") +
  ylab("Private Alleles") +
 # ylim(0, 500) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

#ggsave(here::here("./Figures_Tables/regressions/private_alleles.png"))
2.2.3.1.2 Urb_score
ggplot(diversity_all_sites,
       aes(x = urb_score,
           y = private)) +
  geom_point(aes(x = urb_score,
           y = private)) +
  geom_line() +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization Score") +
  ylab("Private Alleles")+
  xlim(4, -4) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.2.3.2 FIS

2.2.3.2.1 City_dist
# binary
fis_bin_dist_pred <- ggeffects::ggpredict(fis_dist_bin,
                                   terms = c("City_dist [all]"),
                                   type = "fe")

plot(fis_bin_dist_pred, 
     add.data = T,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("FIS (binary)") +
  ylim(NA, 1) +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() 

# quantitative
fis_quant_dist_pred <- ggeffects::ggpredict(fis_dist_quant,
                                   terms = c("City_dist"),
                                   type = "fe")

plot(fis_quant_dist_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("FIS (quantitative)") +
 # ylim(NA, 1) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.2.3.2.2 Urb_score
# binary
fis_bin_usc_pred <- ggeffects::ggpredict(fis_usc_bin,
                                   terms = c("urb_score [all]"),
                                   type = "fe")

plot(fis_bin_usc_pred, 
     add.data = T,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("FIS (binary)") +
  ylim(NA, 1) +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() +
  xlim(4, -4)

# quantitative
fis_quant_usc_pred <- ggeffects::ggpredict(fis_usc_quant,
                                   terms = c("urb_score"),
                                   type = "fe")
 
plot(fis_quant_usc_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() +
  xlim(4, -4)

2.2.3.3 Pi

2.2.3.3.1 City_dist
pi_dist_pred <- ggeffects::ggpredict(pi_dist,
                                   terms = c("City_dist"),
                                   type = "fe")

plot(pi_dist_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("Pi") +
  #ylim(NA, 0.04) +
  labs(title = "") +
  theme_pubr()

ggsave(here::here("./Figures_Tables/regressions/pi.png"))
2.2.3.3.1.1 Urb_score
pi_usc_pred <- ggeffects::ggpredict(pi_usc,
                                   terms = c("urb_score"),
                                   type = "fe")

plot(pi_usc_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # ,
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("Pi") +
  #ylim(NA, 0.04) +
  labs(title = "") +
  theme_pubr()+
  xlim(4, -4)

2.2.3.4 Number of individuals per pop

2.2.3.4.1 City_dist
ggplot(diversity_all_sites,
       aes(x = City_dist,
           y = num_indv)) +
  xlab("Distance to City Center (km)") +
  ylab("Individuals/Population") +
  plot_aesthetics +
  theme_pubr() +
  labs_pubr() 

2.2.3.4.2 Urb_score
ggplot(diversity_all_sites,
       aes(x = urb_score,
           y = num_indv)) +
  xlab("Urbanization Score") +
  ylab("Individuals/Population") +
  plot_aesthetics +
  theme_pubr() +
  labs_pubr() +
  xlim(4, -4)

2.3 Urbanization is categorical / data = all sites

2.3.1 Fit linear models/diagnostics

2.3.1.1 Private alleles

This is 0 for all individuals.

2.3.1.2 Inbreeding coefficient (FIS)

This is extremely zero-inflated. I’ve tried fitting models with zero inflation parameters w/glmmTMB but they don’t converge and/or diagnostics don’t pass tests. I’ll fit manual hurdle models instead.

2.3.1.2.1 u_r_dist
diversity_all_sites %<>%
  mutate(fis_bin = case_when(
    fis == 0 ~ 0,
    fis > 0 ~ 1)
  )
  
# binary
fis_dist_bin_cat <- glmmTMB(fis_bin ~ u_r_dist 
                        + (1|num_indv)
                        ,
           diversity_all_sites,
           family = "binomial")

performance::check_model(fis_dist_bin_cat)

# quantitative
fis_dist_quant_cat <- glmmTMB(fis * 10 ~  u_r_dist 
                        + (1|num_indv)
                        ,
           diversity_all_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_dist_quant_cat)

2.3.1.2.2 u_r_usc
# binary
fis_usc_bin_cat <- glmmTMB(fis_bin ~ u_r_usc 
                        + (1|num_indv)
                        ,
           diversity_all_sites,
           family = "binomial")

performance::check_model(fis_usc_bin_cat)

# quantitative
fis_usc_quant_cat <- glmmTMB(fis * 10 ~ u_r_usc 
                        + (1|num_indv)
                        ,
           diversity_all_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_usc_quant_cat)

2.3.1.3 Pi

2.3.1.3.1 u_r_dist
pi_dist_cat <- glmmTMB(pi ~ u_r_dist ,
           diversity_all_sites)

performance::check_model(pi_dist_cat)

2.3.1.3.2 u_r_usc
pi_usc_cat <- glmmTMB(pi ~ u_r_usc,
           diversity_all_sites)

performance::check_model(pi_usc_cat)

2.3.2 ANOVA & effect sizes

mod_list_cat <- list(
  # priv_dist_cat,
  #                priv_usc_cat,
                 fis_dist_bin_cat,
                 fis_dist_quant_cat,
                 fis_usc_bin_cat,
                 fis_usc_quant_cat,
                 pi_dist_cat,
                 pi_usc_cat)

all_anovas_cat <- lapply(mod_list_cat, create_anova_df) %>%
  do.call(rbind, .) %>%
    dplyr::mutate("Variable" = case_when(
      Variable == "fis" ~ "FIS (quantitative)",
      Variable == "fis * 10" ~ "FIS (quantitative)",
      Variable == "fis_bin" ~ "FIS (binary)",
      Variable == "pi" ~ "Pi",
      # Variable == "private" ~ "Private alleles",
      TRUE ~ "")) %>%
  dplyr::mutate("Urbanization" = case_when(
      Urbanization == "u_r_dist" ~ "Distance (categorical)",
      Urbanization == "u_r_usc" ~ "Urbanization Score (categorical)",
      TRUE ~ "")) %>%
  dplyr::select(1,5,2,3,4,7,6) %T>%
  view

# create flextable
all_anovas_cat %>%
  dplyr::select( -Sig) %>%
  flextable::flextable() %>%
  flextable::compose(i = 1, j = 2, part = "header",
                     value = as_paragraph("Χ", as_sup("2"))) %>%
  flextable::compose(i = 1, j = 5, part = "header",
                     value = as_paragraph("R", as_sup("2"))) %>%
  bold(~ p <= 0.05) %>%
  flextable::autofit()

2.3.3 Figures

2.3.3.1 Private alleles

2.3.3.1.1 City_dist
ggplot(diversity_all_sites,
       aes(x = u_r_dist,
           y = private)) +
  geom_violin(aes(x = u_r_dist,
           y = private)) +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("Private Alleles") +
 # ylim(0, 500) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.3.3.1.2 Urb_score
ggplot(diversity_all_sites,
       aes(x = u_r_usc,
           y = private)) +
  geom_violin(aes(x = u_r_usc,
           y = private)) +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("Private Alleles") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.3.3.2 FIS

2.3.3.2.1 City_dist
# binary
fis_bin_dist_cat_pred <- ggeffects::ggpredict(fis_dist_bin_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(fis_bin_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("FIS (binary)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

# quantitative
fis_quant_dist_cat_pred <- ggeffects::ggpredict(fis_dist_quant_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(fis_quant_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.3.3.2.2 Urb_score
# binary
fis_bin_usc_cat_pred <- ggeffects::ggpredict(fis_usc_bin_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(fis_bin_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("FIS (binary)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

# quantitative
fis_quant_usc_cat_pred <- ggeffects::ggpredict(fis_usc_quant_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(fis_quant_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.3.3.3 Pi

2.3.3.3.1 City_dist
pi_dist_cat_pred <- ggeffects::ggpredict(pi_dist_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(pi_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("Pi") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.3.3.3.1.1 Urb_score
pi_usc_cat_pred <- ggeffects::ggpredict(pi_usc_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(pi_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("Pi") +
  labs(title = "") +
  theme_pubr()  +
  labs_pubr() 

2.4 Urbanization is continuous / data = ONLY VARIANT sites

2.4.1 Fit linear models/diagnostics

2.4.1.1 Private alleles

This is 0 for each individual.

2.4.1.2 Inbreeding coefficient (FIS)

This is extremely zero-inflated. I’ve tried fitting models with zero inflation parameters w/glmmTMB but they don’t converge and/or diagnostics don’t pass tests. I’ll fit manual hurdle models instead.

2.4.1.2.1 City_dist
diversity_variant_sites %<>%
  mutate(fis_bin = case_when(
    fis == 0 ~ 0,
    fis > 0 ~ 1,
    TRUE ~ 1)
  )
  
# binary
# doesn't converge w/num_indiv as random effect
fis_dist_bin <- glmmTMB(fis_bin ~ City_dist
                       # + (1|num_indv)
                        ,
           diversity_variant_sites,
           family = "binomial")

performance::check_model(fis_dist_bin)

# but num_indiv ~ City_dist is significant, so I think num_indivs is a lurking variable and this relationships isn't real


# quantitative
fis_dist_quant <- glmmTMB(fis ~  City_dist
                           + (1|num_indv)
                        ,
           diversity_variant_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_dist_quant)

2.4.1.2.2 Urb_score
# binary
# doesn't converge w/num_indiv as random effect
fis_usc_bin <- glmmTMB(fis_bin ~ urb_score                          # + (1|num_indv)
                        ,
           diversity_variant_sites,
           family = "binomial")

performance::check_model(fis_usc_bin)

# quantitative
fis_usc_quant <- glmmTMB(fis ~ urb_score                            + (1|num_indv)
                        ,
           diversity_variant_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_usc_quant)

2.4.1.3 Pi

2.4.1.3.1 City_dist
pi_dist <- glmmTMB(pi ~ City_dist ,
           diversity_variant_sites)

performance::check_model(pi_dist)

2.4.1.3.2 Urb_score
pi_usc <- glmmTMB(pi ~ urb_score ,
           diversity_variant_sites)

performance::check_model(pi_usc)

2.4.2 ANOVA & effect sizes

mod_list <- list(# priv_dist,
                 # priv_usc,
                 fis_dist_bin,
                 fis_dist_quant,
                 fis_usc_bin,
                 fis_usc_quant,
                 pi_dist,
                 pi_usc)


all_anovas <- lapply(mod_list, create_anova_df) %>%
  do.call(rbind, .) %>%
  dplyr::mutate("Variable" = case_when(
      Variable == "fis * 1000" ~ "FIS (quantitative)",
      Variable == "fis * 10000" ~ "FIS (quantitative)",
      Variable == "fis" ~ "FIS (quantitative)",
      Variable == "fis_bin" ~ "FIS (binary)",
      Variable == "pi" ~ "Pi",
      Variable == "private" ~ "Private alleles",
      TRUE ~ "")) %>%
    dplyr::mutate("Urbanization" = case_when(
      Urbanization == "City_dist" ~ "Distance",
      Urbanization == "urb_score" ~ "Urbanization Score",
      TRUE ~ "")) %>%
  dplyr::select(1,5,2,3,4,7,6) %T>%
  view

# create flextable
all_anovas %>%
  dplyr::select(-Sig) %>%
  flextable::flextable() %>%
  flextable::compose(i = 1, j = 3, part = "header",
                     value = as_paragraph("Χ", as_sup("2"))) %>%
  flextable::compose(i = 1, j = 6, part = "header",
                     value = as_paragraph("R", as_sup("2"))) %>%
  bold(~ p <= 0.05) %>%
  flextable::autofit()

2.4.3 Figures

2.4.3.1 Private alleles

2.4.3.1.1 City_dist
ggplot(diversity_variant_sites,
       aes(x = City_dist,
           y = private)) +
  geom_point(aes(x = City_dist,
           y = private)) +
  geom_line() +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Distance to City Center (km)") +
  ylab("Private Alleles") +
 # ylim(0, 500) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

#ggsave(here::here("./Figures_Tables/regressions/private_alleles.png"))
2.4.3.1.2 Urb_score
ggplot(diversity_variant_sites,
       aes(x = urb_score,
           y = private)) +
  geom_point(aes(x = urb_score,
           y = private)) +
  geom_line() +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization Score") +
  ylab("Private Alleles")+
  xlim(4, -4) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.4.3.2 FIS

2.4.3.2.1 City_dist
# binary
fis_bin_dist_pred <- ggeffects::ggpredict(fis_dist_bin,
                                   terms = c("City_dist [all]"),
                                   type = "fe")

plot(fis_bin_dist_pred, 
     add.data = T,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("FIS (binary)") +
  ylim(NA, 1) +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() 

# quantitative
fis_quant_dist_pred <- ggeffects::ggpredict(fis_dist_quant,
                                   terms = c("City_dist"),
                                   type = "fe")

plot(fis_quant_dist_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("FIS (quantitative)") +
 # ylim(NA, 1) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.4.3.2.2 Urb_score
# binary
fis_bin_usc_pred <- ggeffects::ggpredict(fis_usc_bin,
                                   terms = c("urb_score [all]"),
                                   type = "fe")

plot(fis_bin_usc_pred, 
     add.data = T,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("FIS (binary)") +
  ylim(NA, 1) +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() +
  xlim(4, -4)

# quantitative
fis_quant_usc_pred <- ggeffects::ggpredict(fis_usc_quant,
                                   terms = c("urb_score"),
                                   type = "fe")
 
plot(fis_quant_usc_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr()+
  labs_pubr() +
  xlim(4, -4)

2.4.3.3 Pi

2.4.3.3.1 City_dist
pi_dist_pred <- ggeffects::ggpredict(pi_dist,
                                   terms = c("City_dist"),
                                   type = "fe")

plot(pi_dist_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Distance to City Center (km)") +
  ylab("Pi") +
  #ylim(NA, 0.04) +
  labs(title = "") +
  theme_pubr()

# ggsave(here::here("./Figures_Tables/regressions/pi.png"))
2.4.3.3.1.1 Urb_score
pi_usc_pred <- ggeffects::ggpredict(pi_usc,
                                   terms = c("urb_score"),
                                   type = "fe")

plot(pi_usc_pred, 
     add.data = F,
     colors = "bw") +
  geom_ribbon(aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              # ,
              # fill = "#66a182",
               alpha = 0.1
              ) +
  xlab("Urbanization Score") +
  ylab("Pi") +
  #ylim(NA, 0.04) +
  labs(title = "") +
  theme_pubr()+
  xlim(4, -4)

2.4.3.4 Number of individuals per pop

2.4.3.4.1 City_dist
ggplot(diversity_variant_sites,
       aes(x = City_dist,
           y = num_indv)) +
  xlab("Distance to City Center (km)") +
  ylab("Individuals/Population") +
  plot_aesthetics +
  theme_pubr() +
  labs_pubr() 

2.4.3.4.2 Urb_score
ggplot(diversity_variant_sites,
       aes(x = urb_score,
           y = num_indv)) +
  xlab("Urbanization Score") +
  ylab("Individuals/Population") +
  plot_aesthetics +
  theme_pubr() +
  labs_pubr() +
  xlim(4, -4)

2.5 Urbanization is categorical / data = ONLY VARIANT sites

2.5.1 Fit linear models/diagnostics

2.5.1.1 Private alleles

This is 0 for all individuals.

2.5.1.2 Inbreeding coefficient (FIS)

This is extremely zero-inflated. I’ve tried fitting models with zero inflation parameters w/glmmTMB but they don’t converge and/or diagnostics don’t pass tests. I’ll fit manual hurdle models instead.

2.5.1.2.1 u_r_dist
diversity_variant_sites %<>%
  mutate(fis_bin = case_when(
    fis == 0 ~ 0,
    fis > 0 ~ 1)
  )
  
# binary
fis_dist_bin_cat <- glmmTMB(fis_bin ~ u_r_dist 
                        + (1|num_indv)
                        ,
           diversity_variant_sites,
           family = "binomial")

performance::check_model(fis_dist_bin_cat)

# quantitative
fis_dist_quant_cat <- glmmTMB(fis ~  u_r_dist 
                        + (1|num_indv)
                        ,
           diversity_variant_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_dist_quant_cat)

2.5.1.2.2 u_r_usc
# binary
fis_usc_bin_cat <- glmmTMB(fis_bin ~ u_r_usc 
                        + (1|num_indv)
                        ,
           diversity_variant_sites,
           family = "binomial")

performance::check_model(fis_usc_bin_cat)

# quantitative
fis_usc_quant_cat <- glmmTMB(fis * 10 ~ u_r_usc 
                        + (1|num_indv)
                        ,
           diversity_variant_sites %>%
             dplyr::filter(fis != 0))

performance::check_model(fis_usc_quant_cat)

2.5.1.3 Pi

2.5.1.3.1 u_r_dist
pi_dist_cat <- glmmTMB(pi ~ u_r_dist ,
           diversity_variant_sites)

performance::check_model(pi_dist_cat)

2.5.1.3.2 u_r_usc
pi_usc_cat <- glmmTMB(pi ~ u_r_usc,
           diversity_variant_sites)

performance::check_model(pi_usc_cat)

2.5.2 ANOVA & effect sizes

mod_list_cat <- list(
  # priv_dist_cat,
  #                priv_usc_cat,
                 fis_dist_bin_cat,
                 fis_dist_quant_cat,
                 fis_usc_bin_cat,
                 fis_usc_quant_cat,
                 pi_dist_cat,
                 pi_usc_cat)

all_anovas_cat <- lapply(mod_list_cat, create_anova_df) %>%
  do.call(rbind, .) %>%
    dplyr::mutate("Variable" = case_when(
      Variable == "fis" ~ "FIS (quantitative)",
      Variable == "fis * 10" ~ "FIS (quantitative)",
      Variable == "fis_bin" ~ "FIS (binary)",
      Variable == "pi" ~ "Pi",
      # Variable == "private" ~ "Private alleles",
      TRUE ~ "")) %>%
  dplyr::mutate("Urbanization" = case_when(
      Urbanization == "u_r_dist" ~ "Distance (categorical)",
      Urbanization == "u_r_usc" ~ "Urbanization Score (categorical)",
      TRUE ~ "")) %>%
  dplyr::select(1,5,2,3,4,7,6) %T>%
  view

# create flextable
all_anovas_cat %>%
  dplyr::select( -Sig) %>%
  flextable::flextable() %>%
  flextable::compose(i = 1, j = 3, part = "header",
                     value = as_paragraph("Χ", as_sup("2"))) %>%
  flextable::compose(i = 1, j = 6, part = "header",
                     value = as_paragraph("R", as_sup("2"))) %>%
  bold(~ p <= 0.05) %>%
  flextable::autofit()

2.5.3 Figures

2.5.3.1 Private alleles

2.5.3.1.1 City_dist
ggplot(diversity_variant_sites,
       aes(x = u_r_dist,
           y = private)) +
  geom_violin(aes(x = u_r_dist,
           y = private)) +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("Private Alleles") +
 # ylim(0, 500) +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.5.3.1.2 Urb_score
ggplot(diversity_variant_sites,
       aes(x = u_r_usc,
           y = private)) +
  geom_violin(aes(x = u_r_usc,
           y = private)) +
  # geom_ribbon(fill = "#66a182",
  #              alpha = 0.1
  #             ) +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("Private Alleles") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.5.3.2 FIS

2.5.3.2.1 City_dist
# binary
fis_bin_dist_cat_pred <- ggeffects::ggpredict(fis_dist_bin_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(fis_bin_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("FIS (binary)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

# quantitative
fis_quant_dist_cat_pred <- ggeffects::ggpredict(fis_dist_quant_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(fis_quant_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.5.3.2.2 Urb_score
# binary
fis_bin_usc_cat_pred <- ggeffects::ggpredict(fis_usc_bin_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(fis_bin_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("FIS (binary)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

# quantitative
fis_quant_usc_cat_pred <- ggeffects::ggpredict(fis_usc_quant_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(fis_quant_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("FIS (quantitative)") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.5.3.3 Pi

2.5.3.3.1 City_dist
pi_dist_cat_pred <- ggeffects::ggpredict(pi_dist_cat,
                                   terms = c("u_r_dist"),
                                   type = "fe")

plot(pi_dist_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nDistance to City Center)") +
  ylab("Pi") +
  labs(title = "") +
  theme_pubr() +
  labs_pubr() 

2.5.3.3.1.1 Urb_score
pi_usc_cat_pred <- ggeffects::ggpredict(pi_usc_cat,
                                   terms = c("u_r_usc"),
                                   type = "fe")

plot(pi_usc_cat_pred, 
     add.data = F,
     colors = "bw") +
  xlab("Urbanization (based on\nUrbanization Score)") +
  ylab("Pi") +
  labs(title = "") +
  theme_pubr()  +
  labs_pubr()